import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# ============= MNIST数据集探索 ==============
# 读取MNIST数据集
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# 了解MNIST数据集格式
print('Train image shape:', train_images.shape, 'Train label shape:', train_labels.shape)
print('Test image shape:', test_images.shape, 'Test label shape:', test_labels.shape)
# 具体看一幅image的数据
print('image data:', train_images[1])
print('label data:', train_labels[1])


# 可视化image
def plot_image(image):
    plt.imshow(image.reshape(28, 28), cmap='binary')
    plt.show()


# plot_image(train_images[1])
# plot_image(train_images[20000])

# ============= 数据集划分 ===============
total_num = len(train_images)
valid_split = 0.2  # 验证集的比例
train_num = int(total_num*(1 - valid_split))  # 训练集的数目
train_x = train_images[:train_num]
train_y = train_labels[:train_num]
valid_x = train_images[train_num:]
valid_y = train_labels[train_num:]
test_x = test_images
test_y = test_labels

# 把(28,28)的结构拉直为一行784
train_x = train_x.reshape(-1, 784)
valid_x = valid_x.reshape(-1, 784)
test_x = test_x.reshape(-1, 784)
# 特征数据归一化
train_x = tf.cast(train_x/255.0, tf.float32)
valid_x = tf.cast(valid_x/255.0, tf.float32)
test_x = tf.cast(test_x/255.0, tf.float32)
# 对标签数据进行独热编码
train_y = tf.one_hot(train_y, depth=10)
valid_y = tf.one_hot(valid_y, depth=10)
test_y = tf.one_hot(test_y, depth=10)


# ============= 模型与相关计算的定义 ===============
# 定义模型
def model(x, w, b):
    pred = tf.matmul(x, w) + b
    return tf.nn.softmax(pred)


# 定义交叉熵损失函数
def loss(x, y, w, b):
    pred = model(x, w, b)
    # 计算模型预测值和标签值的差异
    loss_ = tf.keras.losses.categorical_crossentropy(y_true=y, y_pred=pred)
    return tf.reduce_mean(loss_)  # 求均值


# 定义梯度计算函数
# 计算样本数据[x,y]在参数[w,b]点上的梯度
def grad(x, y, w, b):
    with tf.GradientTape() as tape:
        loss_ = loss(x, y, w, b)
    return tape.gradient(loss_, [w, b])  # 返回梯度向量


# 定义准确率
def accuracy(x, y, w, b):
    pred = model(x, w, b)
    # 检查预测类别tf.argmax(pred, 1) 与实际类别tf.argmax(y, 1) 的匹配情况
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # 准确率，将布尔值转化为浮点数，并计算平均值
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


# ================= 模型训练 ===================
# 创建待优化的变量
W = tf.Variable(tf.random.normal([784, 10], mean=0.0, stddev=1.0, dtype=tf.float32))
B = tf.Variable(tf.zeros(10), dtype=tf.float32)

# 设置训练参数
training_epochs = 20  # 训练轮数
batch_size = 50  # 单次训练样本数
learning_rate = 0.001  # 学习率
# 选择Adam优化器
optimizer = tf.keras.optimizers.Adam(learning_rate)

# 模型训练
total_step = int(train_num / batch_size)  # 一轮训练有多少批次
loss_list_train = []  # 用于保存训练集loss值的列表
loss_list_valid = []  # 用于保存验证集loss值的列表
acc_list_train = []  # 用于保存训练集Acc值的列表
acc_list_valid = []  # 用于保存验证集Acc值的列表

for epoch in range(training_epochs):
    for step in range(total_step):
        xs = train_x[step * batch_size:(step + 1) * batch_size]
        ys = train_y[step * batch_size:(step + 1) * batch_size]
        grads = grad(xs, ys, W, B)  # 计算梯度
        optimizer.apply_gradients(zip(grads, [W, B]))  # 优化器根据梯度自动调整w和b

    loss_train = loss(train_x, train_y, W, B).numpy()  # 计算当前轮训练损失
    loss_valid = loss(valid_x, valid_y, W, B).numpy()  # 计算当前轮验证损失
    acc_train = accuracy(train_x, train_y, W, B).numpy()
    acc_valid = accuracy(valid_x, valid_y, W, B).numpy()
    loss_list_train.append(loss_train)
    loss_list_valid.append(loss_valid)
    acc_list_train.append(acc_train)
    acc_list_valid.append(acc_valid)
    print('epoch=%3d, train_loss=%.4f, train_acc=%.4f, valid_loss=%.4f, valid_acc=%.4f'
          % (epoch + 1, loss_train, acc_train, loss_valid, acc_valid))

# 显示训练过程数据
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(loss_list_train, color='blue', label='Train Loss')
plt.plot(loss_list_valid, color='red', label='Valid Loss')
plt.legend(loc=1)  # 通过loc指定图例位置
plt.show()

plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(acc_list_train, color='blue', label='Train Acc')
plt.plot(acc_list_valid, color='red', label='Valid Acc')
plt.legend(loc=1)  # 通过loc指定图例位置
plt.show()

# 在测试集上评估模型
print('Test accuracy:', accuracy(test_x, test_y, W, B).numpy())


# ================= 使用模型进行预测 ===================
# 若准确率可以接收，则可以用此模型进行预测
def predict(x, w, b):
    pred = model(x, w, b)
    return tf.argmax(pred, 1).numpy()


pred_test = predict(test_x, W, B)
print(pred_test[0])


# ================= 可视化预测结果 ===================
def plot_images_labels_prediction(images,  # 图像列表
                                  labels,  # 标签列表
                                  preds,  # 预测值列表
                                  index=0,  # 从第index个开始显示
                                  num=10):  # 缺省一次显示10幅
    fig = plt.gcf()  # 获取当前图表，Get Current Figure
    fig.set_size_inches(10, 4)  # 1英寸等于2.54cm
    if num > 10:
        num = 10  # 最多显示10个子图
    for i in range(num):
        ax = plt.subplot(2, 5, i+1)  # 获取当前要处理的子图
        ax.imshow(np.reshape(images[index], (28,28)), cmap='binary')  # 显示第index个图像
        title = 'label=' + str(labels[index])  # 构建该图上要显示的title信息
        if len(preds) > 0:
            title += ',predict=' + str(preds[index])

        ax.set_title(title, fontsize=10)  # 显示图上的title信息
        ax.set_xticks([])  # 不显示坐标轴
        ax.set_yticks([])
        index += 1
    plt.show()


plot_images_labels_prediction(test_images, test_labels, pred_test, 10, 10)
